{
 "metadata": {
  "name": "",
  "signature": "sha256:d61aca7b09d27726e1d16c369db0862155a496068f0deeccaf27f7f8a50294ac"
 },
 "nbformat": 3,
 "nbformat_minor": 0,
 "worksheets": [
  {
   "cells": [
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "% load_ext autoreload\n",
      "% autoreload 2\n",
      "% matplotlib inline\n",
      "% load_ext cythonmagic\n",
      "% config InlineBackend.figure_format = 'svg'\n",
      "import matplotlib.pyplot as plt\n",
      "import numpy as np, matplotlib\n",
      "\n",
      "from cython_lstm.network import Network\n",
      "from cython_lstm.neuron  import Neuron, LogisticNeuron, TanhNeuron, SoftmaxNeuron\n",
      "from cython_lstm.layers  import Layer, LoopLayer, SliceLayer, TileLayer, TemporalLayer, RecurrentLayer, RecurrentAveragingLayer, RecurrentMultiStageLayer\n",
      "from cython_lstm.trainer import Trainer\n",
      "from cython_lstm.dataset import create_xor_dataset, create_digit_dataset\n",
      "\n",
      "SIZE = 10\n",
      "INTERNAL_SIZE = 5\n",
      "TIMESTEPS = 2\n",
      "STREAMS = 2"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "The autoreload extension is already loaded. To reload it, use:\n",
        "  %reload_ext autoreload\n",
        "The cythonmagic extension is already loaded. To reload it, use:\n",
        "  %reload_ext cythonmagic\n"
       ]
      }
     ],
     "prompt_number": 48
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "# Testing recurrent behavior\n",
      "\n",
      "## Forward propagation\n",
      "\n",
      "Forward propagation through time is simple, and should be efficient memory wise. It requires some planning at the wiring stage to know when the output of one stage is ready for the next. Here we test this assumption by constructing two very similar networks.\n",
      "\n",
      "Both have an input layer, a gate, and a layer that averages the previous activation with the new one using the gate. In the first network the gate is fed by the input, in the second network, the gate is fed by the activation of the first layer.\n",
      "\n",
      "Here we perform both sets of calculations using an internal for loop that does book keeping for us, and we also perform the same calculation using an explicit loop for inspectability. Both operations should return the same result, regardless of the network wiring."
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "net1 = Network()\n",
      "\n",
      "# create the layers\n",
      "linear_layer    = TemporalLayer(SIZE, INTERNAL_SIZE, neuron = TanhNeuron)\n",
      "\n",
      "# a gate has internal size 1\n",
      "gate            = TemporalLayer(SIZE, 1, neuron = LogisticNeuron)\n",
      "\n",
      "# this is a multiplier layer, with a sum gate, and another multiplier layer.\n",
      "# although it's not expressed as such\n",
      "averaging_layer = RecurrentAveragingLayer(gate, linear_layer)\n",
      "\n",
      "# input should be dispatched to the gate and the linear layer\n",
      "tiles = TileLayer()\n",
      "tiles.connect_to(gate, temporal=True)\n",
      "tiles.connect_to(linear_layer, temporal=True)\n",
      "\n",
      "linear_layer.connect_to(averaging_layer, temporal=True)\n",
      "\n",
      "averaging_layer._initial_hidden_state += np.random.standard_normal(averaging_layer._initial_hidden_state.shape)\n",
      "\n",
      "net1.add_layer(tiles, input=True)\n",
      "net1.add_layer(linear_layer)\n",
      "net1.add_layer(gate)\n",
      "net1.add_layer(averaging_layer, output=True)\n",
      "\n",
      "recurrent_data = np.random.standard_normal([TIMESTEPS, STREAMS, SIZE]).astype(np.float32)\n",
      "out = net1.activate(recurrent_data)[-1]\n",
      "net1.clear()\n",
      "\n",
      "# manual pass\n",
      "\n",
      "net1.layers[1].allocate_activation(TIMESTEPS, STREAMS)\n",
      "net1.layers[2].allocate_activation(TIMESTEPS, STREAMS)\n",
      "net1.layers[3].allocate_activation(TIMESTEPS, STREAMS)\n",
      "\n",
      "for t in range(TIMESTEPS):\n",
      "    out1 = net1.layers[1].forward_propagate(recurrent_data[t, :, :])\n",
      "    out2 = net1.layers[2].forward_propagate(recurrent_data[t, :, :])\n",
      "    out3 = net1.layers[3].forward_propagate(out1)\n",
      "    net1.layers[1].step += 1\n",
      "    net1.layers[2].step += 1\n",
      "    net1.layers[3].step += 1\n",
      "net1.clear()\n",
      "\n",
      "# comparison\n",
      "print(\"Outputs are identical => \", np.allclose(out, out3))"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "Outputs are identical =>  True\n"
       ]
      }
     ],
     "prompt_number": 6
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "Alternate wiring diagram, now the output of the first Layer feeds the gate:"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "net2 = Network()\n",
      "\n",
      "# create the layers\n",
      "recurrent_layer = TemporalLayer(SIZE, INTERNAL_SIZE, neuron = TanhNeuron)\n",
      "gate            = RecurrentGatedLayer(INTERNAL_SIZE, neuron = LogisticNeuron)\n",
      "averaging_layer = RecurrentAveragingLayer(gate, recurrent_layer)\n",
      "\n",
      "# connect them in the order the computation should proceed\n",
      "recurrent_layer.connect_to(gate, temporal=True)\n",
      "recurrent_layer.connect_to(averaging_layer, temporal=True)\n",
      "\n",
      "#averaging_layer.connect_to(recurrent_layer, temporal=True)\n",
      "\n",
      "averaging_layer._initial_hidden_state += np.random.standard_normal(averaging_layer._initial_hidden_state.shape)\n",
      "\n",
      "net2.add_layer(recurrent_layer, input=True)\n",
      "net2.add_layer(gate)\n",
      "net2.add_layer(averaging_layer, output=True)\n",
      "\n",
      "recurrent_data = np.random.standard_normal([TIMESTEPS, STREAMS, SIZE]).astype(np.float32)\n",
      "out = net2.activate(recurrent_data)[-1]\n",
      "net2.clear()\n",
      "\n",
      "net2.layers[0].allocate_activation(TIMESTEPS, STREAMS)\n",
      "net2.layers[1].allocate_activation(TIMESTEPS, STREAMS)\n",
      "net2.layers[2].allocate_activation(TIMESTEPS, STREAMS)\n",
      "\n",
      "for t in range(TIMESTEPS):\n",
      "    out1 = net2.layers[0].forward_propagate(recurrent_data[t, :, :])\n",
      "    out2 = net2.layers[1].forward_propagate(out1)\n",
      "    out3 = net2.layers[2].forward_propagate(out1)\n",
      "    net2.layers[0].step += 1\n",
      "    net2.layers[1].step += 1\n",
      "    net2.layers[2].step += 1\n",
      "net2.clear()\n",
      "print(\"Outputs are identical => \", np.allclose(out, out3))"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "Outputs are identical =>  True\n"
       ]
      }
     ],
     "prompt_number": 28
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "## Backward propagation\n",
      "\n",
      "Backward propagation in general can be done nicely for well designed graphs. However whenever cycles are introduced we need to perform backpropagation through time. In this instance we want to make sure these operations are well defined, and that the internal bookkeeping is done correctly, so that the error signal is sent through all stages of the computational graph correctly.\n"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "PREDICTION_SIZE = 20\n",
      "\n",
      "net2 = Network()\n",
      "\n",
      "# create the layers\n",
      "recurrent_layer      = TemporalLayer(SIZE, INTERNAL_SIZE, neuron = TanhNeuron)\n",
      "gate                 = RecurrentGatedLayer(INTERNAL_SIZE, neuron = LogisticNeuron)\n",
      "averaging_layer      = RecurrentAveragingLayer(gate, recurrent_layer)\n",
      "\n",
      "temporal_slice       = SliceLayer(-1)\n",
      "\n",
      "softmax_output_layer = Layer(INTERNAL_SIZE, PREDICTION_SIZE, neuron=SoftmaxNeuron)\n",
      "\n",
      "# connect them in the order the computation should proceed\n",
      "recurrent_layer.connect_to(gate, temporal=True)\n",
      "recurrent_layer.connect_to(averaging_layer, temporal=True)\n",
      "\n",
      "averaging_layer.connect_to(temporal_slice)\n",
      "temporal_slice.connect_to(softmax_output_layer)\n",
      "averaging_layer._initial_hidden_state += np.random.standard_normal(averaging_layer._initial_hidden_state.shape)\n",
      "\n",
      "net2.add_layer(recurrent_layer, input=True)\n",
      "net2.add_layer(gate)\n",
      "net2.add_layer(averaging_layer)\n",
      "net2.add_layer(temporal_slice)\n",
      "net2.add_layer(softmax_output_layer, output=True)\n",
      "\n",
      "prediction_data = np.random.multinomial(1, np.arange(0., PREDICTION_SIZE) / np.arange(0., PREDICTION_SIZE).sum(), size=(STREAMS))"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 50
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "net2.layers"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 51,
       "text": [
        "[<TemporalLayer {'output_size': 5, 'input_size': 10, 'activation': 'Tanh'}>,\n",
        " <RecurrentGatedLayer {'output_size': 1, 'input_size': 5, 'activation': 'Sigmoid'}>,\n",
        " <RecurrentAveragingLayer {'output_size': 5, 'input_size': '1 + 5', 'activation': 'a * b + (1 - a) * c'}>,\n",
        " <SliceLayer {'output_size': 5, 'input_size': 5, 'activation': ''}>,\n",
        " <Layer {'output_size': 20, 'input_size': 5, 'activation': 'Softmax'}>]"
       ]
      }
     ],
     "prompt_number": 51
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "net2.clear()\n",
      "net2.activate(recurrent_data)\n",
      "net2.backpropagate(prediction_data)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "Activating forward layers from TemporalLayer\n"
       ]
      },
      {
       "ename": "AttributeError",
       "evalue": "'NoneType' object has no attribute 'copy'",
       "output_type": "pyerr",
       "traceback": [
        "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m\n\u001b[0;31mAttributeError\u001b[0m                            Traceback (most recent call last)",
        "\u001b[0;32m<ipython-input-47-61ccf4c65fd6>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[0mnet2\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mclear\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      2\u001b[0m \u001b[0mnet2\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mactivate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mrecurrent_data\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 3\u001b[0;31m \u001b[0mnet2\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbackpropagate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mprediction_data\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
        "\u001b[0;32m/Users/jonathanraiman/Desktop/Coding/cython_lstm/cython_lstm/network.py\u001b[0m in \u001b[0;36mbackpropagate\u001b[0;34m(self, target)\u001b[0m\n\u001b[1;32m     51\u001b[0m             \u001b[0;32mif\u001b[0m \u001b[0mtarget\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mndim\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     52\u001b[0m                 \u001b[0mtarget\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtarget\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreshape\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 53\u001b[0;31m         \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_output_layer\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0merror_activate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtarget\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     54\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     55\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0merror\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtarget\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
        "\u001b[0;32m/Users/jonathanraiman/Desktop/Coding/cython_lstm/cython_lstm/layers/layer.py\u001b[0m in \u001b[0;36merror_activate\u001b[0;34m(self, target)\u001b[0m\n\u001b[1;32m    136\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    137\u001b[0m         \u001b[0;31m# get the error here\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 138\u001b[0;31m         \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbackpropagate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdEdy\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mactivation\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mtarget\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    139\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    140\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0mclear_weight_caches\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
        "\u001b[0;32m/Users/jonathanraiman/Desktop/Coding/cython_lstm/cython_lstm/neuron.py\u001b[0m in \u001b[0;36mdEdy\u001b[0;34m(y, t)\u001b[0m\n\u001b[1;32m    103\u001b[0m     \u001b[0;34m@\u001b[0m\u001b[0mstaticmethod\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    104\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0mdEdy\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0my\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mt\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 105\u001b[0;31m         \u001b[0mdEdy\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcopy\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    106\u001b[0m         \u001b[0mdEdy\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0marange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mt\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mt\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m-=\u001b[0m \u001b[0;36m1.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    107\u001b[0m         \u001b[0;32mreturn\u001b[0m \u001b[0mdEdy\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
        "\u001b[0;31mAttributeError\u001b[0m: 'NoneType' object has no attribute 'copy'"
       ]
      }
     ],
     "prompt_number": 47
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "a.pop()"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 7,
       "text": [
        "1"
       ]
      }
     ],
     "prompt_number": 7
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "a"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 8,
       "text": [
        "[]"
       ]
      }
     ],
     "prompt_number": 8
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "a = [1, 2, 3]"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 10
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "a.index(3)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 14,
       "text": [
        "2"
       ]
      }
     ],
     "prompt_number": 14
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [],
     "language": "python",
     "metadata": {},
     "outputs": []
    }
   ],
   "metadata": {}
  }
 ]
}